#Alexander F. Gazmararian
#afg2@princeton.edu

#Load packages
library(tidyverse)
library(tidylog)
library(stm)
library(gsheet)
library(here)

#For replication
set.seed(10)
#Load data
g <- readRDS(here("data", "inter", "umwj.rds"))
# Create combined text
g$text <- paste(g$Title, g$Contents)
g$text <- gsub("NA", "", g$text)
# Prepare covariates
g$Year <- factor(g$Year)
# Process for STM
processed <- textProcessor(g$text, metadata = g)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <- out$meta
# Data-driven approach to determine number of topics
select <- searchK(documents = docs, vocab = vocab, K = c(5,7,10), data = meta, prevalence = ~ Year, init.type = "Spectral")
stm.fit <- stm(documents = docs, vocab = vocab, K = 7, data = meta, prevalence = ~ Year, init.type = "Spectral")
#Figure B2
png(filename=here("output", "figures", "si_fig_B2_stm.png"),width=6.5,height=5,units="in",res=300)
plot(stm.fit)
dev.off()
